---
title: "Clean SDP data for US Global Health Aid Policy and Family Planning in Sub-Saharan Africa"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup}
# load libraries and source functions 
source(here::here("scripts/functions.R"))
setup_plgha(tidylog = TRUE)



```

# Setup
Use PMA data available from IPUMS PMA. Create an account and follow the instructions in the data README for creating an extract for a "family planning service delivery point" (aka SDP-level data) with the required variables. Update the code below with the corresponding DDI (.xml) and data (.dat.gz) files from your extract.
  
```{r}
# sdp <- read_ipums_micro(
#   ddi = here("data-raw/PMA/pma_sdp.xml"),
#   data = here("data-raw/PMA/pma_sdp.dat.gz")
# )

sdp <- here("data-raw/PMA/plgha_sdp.rds.gz") %>% read_rds()

```


## Drop unused variables

I pulled variables by topic, so there are some in the extract that I drop right away (doubting they will prove useful for this analysis):

```{r filter}
dat <- sdp %>% 
  select(-c(
    starts_with("GEO"),
    starts_with("EASERVED") & ends_with("UR"),
    EASERVED,
    STRATA,
    FACILITYTYPE,
    FACPOSITIONGEN,
    KNOWSPOP,
    FPVTOTMO,
    MOBILENUMMO,
    COUNTRYNUM,
    HWNEARSAN,
    RUNWTRPRES,
    SOAPPRES,
    STOREDWTRPRES,
    FPADOLPROV,
    FPADOLCOUNSEL,
    FPADOLPRESC,
    IUDINSERT,
    IUDREMOVE,
    IUDSPEC,
    IUDTEN,
    IUDCLAMP,
    IUDFORCEP,
    IUDUTERINE,
    IMPINSERT,
    IMPINSTODAY,
    IMPRMVTODAY,
    IMPRMVNPTODAY,
    IMPREMOVE,
    IMPRMVNPREFERKNO,
    IMPSANES,
    IMPSANTISEP,
    IMPSBLADE,
    IMPSGAUZE,
    IMPSGLOVE,
    IMPSPACK,
    FPRECOBS,
    FEEDISPLAY,
    FPSTOREDFLOOR,
    FPSTOREDPESTS,
    FPSTOREDSUN,
    FPSTOREDWATER,
    MIFAVGSOLDMO,
    MISAVGSOLDMO, 
    MMAVGSOLDMO,
    MIFSOLDMO, 
    MISSOLDMO, 
    MMSOLDMO,
    MIFEOBS,
    MISOBS,
    starts_with("DISP"),
    DISMANESTERIAL,
    starts_with("FPRM"), FPROOMTIDY,
    BEDS, HANDWASHNUM,
    HIVFPPRESC, HIVFPCOUNSEL, INJCOUNSEL1MO, INJCOUNSEL3MO,
    HANDWASHNUM, CHVNUM, CHVFPNUM,
    HIVFPPROV,
    SDPOPENYR, DAYSOPEN, FPBEGINPROVYR, FPDAYS, FPPOSTPTLEAVE,
    FPPOSTPTTODAY, FPFEESPOSTED, OFFERSTISERV,
    POPULATION, HIVVISINTENT, HIVINFO, HIVVISFP, HIVCON, 
    HIVFPOTH, HIVREFERPLACE, HIVVISDUAL, HIVVISFPSIDE,
    ends_with("FEE") & !CONSULTFEE,
    ends_with("COUNSEL"),
    ends_with("PRESC"),
    matches("EASERV")
  )) 
```

## "Pack" related variables
Due to number of variables here, we **pack similar variables** with [tidyr::pack](https://tidyr.tidyverse.org/reference/pack.html). Mostly, variables that are packed together come from the same "select multiple" question (e.g. "Which of these methods are provided here?"). In some cases, variables that are packed together may *not* come from the same question, but instead contain similar information (e.g. counts of operational resources, services provided, etc). 

```{r pack}
dat <- dat %>% 
  pack(
    EASERVED = matches("EASERVED[0-9]*$"),
    
    # Family Planning Methods data
    FPPROV = ends_with("PROV") & !ANCPROV & 
      !POSTABSERPROV & !DELIVSERPROV & !PNCPROV,
    FPCUROUT = ends_with("OBS"),
    FPCHARGE = ends_with("CHARGE"),
    FPOUTYR = ends_with("OUTYR"),
    FPOUT3MO = ends_with("OUT3MO"),
    FPOUT6MO = ends_with("OUT6MO"),
    FPOUTDAY = ends_with("OUTDAY"),
    FPSOLDMO = ends_with("SOLDMO"),
    FPNEW_NUMMO = CONNVNUMMO | CYCBNVNUMMO | DEPONNUMMO | DIANVNUMMO | 
      EMRGNVNUMMO | FCNNUMMO | FJNVNUMMO | FSTNVNUMMO | IMPNVNUMMO | 
      INJNVNUMMO | IUDNVNUMMO | MSTNVNUMMO | NTABNNUMMO | OTHNNUMMO | 
      PILLNVNUMMO | PROPILLNNUMMO | SAYNNUMMO | INJ1NVISIT | INJ3NVISIT, 
    FPANY_NUMMO = CONVNUMMO | CYCBVNUMMO | DEPOVNUMMO | DIAVNUMMO | 
      EMRGVNUMMO | FCVNUMMO | FJVNUMMO | FSTVNUMMO | IMPVNUMMO | 
      INJVNUMMO | IUDVNUMMO | MSTVNUMMO | NTABVNUMMO | OTHVNUMMO | 
      PILLVNUMMO | PROPILLVNUMMO | SAYVNUMMO | INJ1VISIT | INJ3VISIT,

    # Staffing
    STAFFPRES = ends_with("PRES"),
    STAFFNUM = ends_with("NUM"),

    # Physical resources
    HAS = starts_with("HAS"),
    
    # HIV 
    HIV = starts_with("HIV"),
    
    # Services Provided
    SERVICES_PROV = ANCPROV | POSTABSERPROV | DELIVSERPROV | PNCPROV | FPCHV | 
      FPPOSTAB | FPPNC | OFFERHIVSERV,
    
    # CHVs
    CHV = FPCHV | CHVCON | CHVINJ | CHVPILL,
    
    # Mobile Outreach
    MOBILE = starts_with("OUTREACH")
)
```

# Collapse Injectable Types 

```{r}
# Make logical from binaries: if any inj -> TRUE
dat <- dat %>% 
  mutate(across(
    c(FPPROV),
    ~.x %>% 
      mutate(across(injs(), ~.x == 1)) %>% 
      mutate(INJANY = case_when(
        if_any(injs()) ~ TRUE,
        if_any(injs(), ~.x == FALSE) ~ FALSE
      ))
  )) 

# CHARGE should be NA if injs aren't provided
dat <- dat %>%
  mutate(
    FPCHARGE = FPCHARGE %>% 
      mutate(
        INJCHARGE = case_when(FPPROV$INJPROV == 1 ~ INJCHARGE == 1),
        INJ1CHARGE = case_when(FPPROV$INJ1PROV == 1 ~ INJ1CHARGE == 1), 
        INJ3CHARGE = case_when(FPPROV$INJ3PROV == 1 ~ INJ3CHARGE == 1),
        DEPOCHARGE = case_when(FPPROV$DEPOPROV == 1 ~ DEPOCHARGE == 1),
        SAYCHARGE = case_when(FPPROV$SAYPROV == 1 ~ SAYCHARGE == 1),
        INJANY = case_when(
          if_any(injs()) ~ TRUE,
          if_any(injs(), ~.x == FALSE) ~ FALSE
        )
      )
  ) 

# Make logical from 3 response OBS vars: if any provided injs were out -> TRUE
dat <- dat %>% 
  mutate(
    FPCUROUT = FPCUROUT %>% 
      mutate(
        INJOBS = case_when(FPPROV$INJPROV == 1 ~ INJOBS > 1),
        INJ1OBS = case_when(FPPROV$INJ1PROV == 1 ~ INJ1OBS > 1), 
        INJ3OBS = case_when(FPPROV$INJ3PROV == 1 ~ INJ3OBS > 1),
        DEPOOBS = case_when(FPPROV$DEPOPROV == 1 ~ DEPOOBS > 1),
        SAYOBS = case_when(FPPROV$SAYPROV == 1 ~ SAYOBS > 1),
        INJANY = case_when(
          if_any(injs()) ~ TRUE,
          if_any(injs(), ~.x == FALSE) ~ FALSE
        )
      )
  ) 


# Recent stockouts (3, 6, or 12 months) should be NA if injs aren't provided
# Most inj variables for 6MO and 12MO don't event exist, so we have to do this 
# without help from `across` 
dat %>% 
  transmute(across(
    c(FPOUT3MO, FPOUT6MO, FPOUTYR), 
    ~.x %>% select(injs())  %>% rename_with(~str_remove(.x, "OUT.*"), injs())
  )) %>% 
  unpack(everything(), names_sep =  "_") %>% 
  names()

dat <- dat %>% 
  mutate(
    across(
      c(FPOUT3MO, FPOUT6MO, FPOUTYR), 
      ~.x %>% rename_with(~str_remove(.x, "OUT.*"), injs())
    ),
    FPOUT3MO = FPOUT3MO %>%
      mutate(
        INJ = case_when(FPPROV$INJPROV == 1 ~ FPCUROUT$INJOBS | INJ > 0),
        INJ1 = case_when(FPPROV$INJ1PROV == 1 ~ FPCUROUT$INJ1OBS | INJ1 > 0),
        INJ3 = case_when(FPPROV$INJ3PROV == 1 ~ FPCUROUT$INJ3OBS | INJ3 > 0),
        DEPO = case_when(FPPROV$DEPOPROV == 1 ~ FPCUROUT$DEPOOBS | DEPO > 0),
        SAY = case_when(FPPROV$SAYPROV == 1 ~ FPCUROUT$SAYOBS | SAY > 0),
        INJANY = case_when(
          if_any(injs()) ~ TRUE,
          if_any(injs(), ~.x == FALSE) ~ FALSE
        )
      ),
    FPOUT6MO = FPOUT6MO %>% 
      mutate(
        INJ1 = case_when(FPPROV$INJ1PROV == 1 ~ FPCUROUT$INJ1OBS | INJ1 > 0),
        INJ3 = case_when(FPPROV$INJ3PROV == 1 ~ FPCUROUT$INJ3OBS | INJ3 > 0),
        INJANY = case_when(
          if_any(injs()) ~ TRUE,
          if_any(injs(), ~.x == FALSE) ~ FALSE
        )
      ),
    FPOUTYR = FPOUTYR %>% 
      mutate(
        INJ = case_when(FPPROV$INJPROV == 1 ~ FPCUROUT$INJOBS | INJ > 0),
        INJ1 = case_when(FPPROV$INJ1PROV == 1 ~ FPCUROUT$INJ1OBS | INJ1 > 0),
        INJ3 = case_when(FPPROV$INJ3PROV == 1 ~ FPCUROUT$INJ3OBS | INJ3 > 0),
        INJANY = case_when(
          if_any(injs()) ~ TRUE,
          if_any(injs(), ~.x == FALSE) ~ FALSE
        )
      )
  ) 

# Mean length of stockout among all injectables currently out of stock 
dat$FPOUTDAY <- dat$FPOUTDAY %>% 
  mutate(
    across(injs(), ~case_when(.x < 9990 ~ .x %>% zap_labels)),
    INJANY = case_when(
      !if_all(injs(), is.na) ~ 
        cur_data() %>% select(injs()) %>% rowMeans(na.rm = T)
    )
  ) 

# Sum of visits / sales for all injectables 
dat <- dat %>% 
  mutate(across(
    c(FPNEW_NUMMO, FPANY_NUMMO, FPSOLDMO),
    ~.x %>% 
      mutate(
        across(injs(), function(var){
          var %>% 
            lbl_na_if(~.val %in% attr(var, "labels", exact = TRUE)) %>% 
            zap_label()
        }),
        INJANY = case_when(
          !if_all(injs(), is.na) ~ 
            cur_data() %>% select(injs()) %>% rowSums(na.rm = TRUE)
        )
      ) 
  )) 
```


# Facility ID 
Some facilities from Ethiopia 2019 are missing a `FACILITYID` - these values are coded -9 in the raw data, but IPUMS normally recodes them as `NA`. 

```{r}
dat <- dat %>% 
  mutate(
    FACILITYID = FACILITYID %>%
      na_if(-9) %>% 
      na_if(-6) %>% 
      lbl_na_if(~.lbl %in% ipums_val_labels(FACILITYID)$lbl) %>% 
      zap_labels()
  ) 
```

Because `FACILITYID` can also appear multiple times (when a facility is sampled in multiple rounds), I'll also create `INTSQID` for some data manipulation tasks: this will uniquely identify each interview. 

```{r}
dat <- dat %>% rowid_to_column("INTSQID")
```


# Dates

## Replace missing

There are two facilities where `INTSQYEAR == 9000 [Logical edit - missing]`. We impute those from the other facilities in the same sample. 

```{r, include=FALSE}
dat %>% 
  filter(INTSQYEAR == 9000) %>% 
  count(SAMPLE)
dat %>% 
  filter(SAMPLE %in% c(23101, 40403)) %>% 
  group_by(SAMPLE) %>% 
  count(INTSQMON, INTSQYEAR)
dat <- dat %>%
  mutate(INTSQYEAR = case_when(
    INTSQYEAR == 9000 & SAMPLE == 23101 ~ 2014L,
    INTSQYEAR == 9000 & SAMPLE == 40403 ~ 2015L,
    T ~ INTSQYEAR %>% as.integer()
  )) 
```

There are also 10 facilities where the interview date "January 2012" appears for samples representing years 2013 to 2016. These are not valid dates, so we replace them with the median month and year for those samples.
 
```{r}
samps <- dat %>% 
  filter(INTSQMON == 1 & INTSQYEAR == 2012) %>% 
  select(SAMPLE, INTSQID)

# Show samples with invalid interview dates:
samps 

# Show counts of INTSQMON and INTSQYEAR to preview the median for each sample:
dat %>% 
  filter(SAMPLE %in% samps$SAMPLE) %>% 
  group_by(SAMPLE) %>% 
  count(INTSQMON, INTSQYEAR) %>%
  print(n = Inf)
  
# Calculate those median values:
meds <- dat %>% 
  filter(SAMPLE %in% samps$SAMPLE) %>% 
  group_by(SAMPLE) %>% 
  summarise(
    .groups = "keep", 
    across(c(INTSQMON, INTSQYEAR), ~.x %>% median() %>% as.integer())
  ) %>% 
  set_value_labels(INTSQMON = attr(dat$INTSQMON, "labels")) %>% 
  ungroup() %>% 
  left_join(samps, by = "SAMPLE") 

meds

# Update `dat` rows with median values
dat <- dat %>% rows_update(meds, by = c("INTSQID", "SAMPLE"))

```

## Date variables 

  * `SURVEY_YEAR` is the *nominal* survey `YEAR` (in fact, interviews may have been collected in the later months of the prior year, or the early months of the following year)
  * `YEAR` is the *calendar year of the interview* in `INTSQYEAR`, except if the interview occurred in 2020: in that case, we use December 2019 
  * `INTSQMON` is the *calendar month of the interview*, except if the interview occurred in 2020: in that case, we use December 2019
  * `INSQCMC` is the CMC for the interview

Now we can create `INTSQCMC`

```{r, include=FALSE}
dat <- dat %>% 
  rename(
    SURVEY_YEAR = YEAR,
    YEAR = INTSQYEAR
  ) %>%
  mutate(
    INTSQMON = as.numeric(INTSQMON),
    INTSQMON = case_when(
       YEAR == 2020 ~ 12,
       T ~ INTSQMON
    ),
    YEAR = case_when(
      YEAR == 2020 ~ SURVEY_YEAR,
      T ~ YEAR
    )
  ) %>%
  set_value_labels(INTSQMON = attr(dat$INTSQMON, "labels")) %>%
  mutate(
    INTSQCMC = 12*(YEAR - 1900) + INTSQMON, .before = INTSQMON
  )

dat <- dat %>% 
  set_variable_labels(
    INTSQMON = "SDP interview month", 
    YEAR = "SDP interview year",
    INTSQCMC = "SDP interview CMC"
  ) 
```



# Exposure
Merge in exposure classification (country level).
```{r, include=FALSE}
dat <- read_rds("data-clean/exposure.rds") %>% 
  rename_all(toupper) %>% 
  list(dat, .) %>% 
  map(~.x %>% mutate(
    COUNTRY = COUNTRY %>% 
      as_factor() %>% 
      fct_relabel(~if_else(
        str_detect(., "Congo"), 
        "Democratic Republic of the Congo", 
        .
      ))
  )) %>%
  reduce(full_join)

```

# PLGHA
Tag `PLGHA ON` periods.

```{r, include=FALSE}
PLGHA_CMC <- 12*(2017 - 1900) + 5
PLGHA_CMC_JAN17 = 12*(2017 - 1900) + 1
PLGHA_CMC_ELECT = 12*(2016 - 1900) + 11
PLGHA_CMC_JAN18 = 12*(2018 - 1900) + 1

dat <- dat %>% 
  mutate(
    PLGHA_ON = INTSQCMC >= PLGHA_CMC, .after = EXPOSURE,
    PLGHA_ON_JAN17 = INTSQCMC >= PLGHA_CMC_JAN17,
    PLGHA_ON_ELECT = INTSQCMC >= PLGHA_CMC_ELECT, 
    PLGHA_ON_JAN18 = INTSQCMC >= PLGHA_CMC_JAN18, 
  )

```



## Drop non-consenters

Drop cases if `!CONSENTSQ`

```{r}
dat <- dat %>% 
  mutate(CONSENTSQ = CONSENTSQ == 1) %>% 
  filter(CONSENTSQ)
```



#  Urban 

The variable `URBAN` indicates whether the facility was located in an "urban" area as defined by the particular country's statistical bureau.

In DRC, where all samples come from Kinshasa or Kongo Central, `URBAN` is `NA` by default. We'll instead assume that all facilities from those samples are "urban".

In 105 cases, `URBAN` is `98` for "No response or missing". There, we'll look in the cleaned "female data" file: if all of the women in a particular enumeration area share the same `URBAN` classification, we'll use that classification for the SDPs in the same area. (This classifies 31 facilities where `URBAN == 98`; all of the remaining facilities will be `NA` unless we do something clever).

```{r}
# How many are 98 and How many are NA?
dat %>% count(URBAN)

# This will classify 31 facilities using EA info from the female file 
dat <- read_rds("data-clean/plgha_woman_df.rds") %>% 
  count(country, year, eaid, urban) %>% 
  rename_with(toupper) %>% 
  group_by(COUNTRY, YEAR, EAID) %>% 
  transmute(
    FEM_URBAN = case_when(n_distinct(URBAN) == 1 ~ unique(URBAN)),
    COUNTRY = COUNTRY,
    YEAR = YEAR, 
    EAID = EAID
  ) %>% 
    ungroup() %>% 
  filter(!is.na(FEM_URBAN)) %>% 
  right_join(dat, by = c("COUNTRY", "YEAR", "EAID")) %>% 
  mutate(URBAN = case_when(
    # Use logical `FEM_URBAN` where `URBAN` is 98 
    URBAN == 98 & !is.na(FEM_URBAN) ~ FEM_URBAN == 1,
    # Use logical `URBAN` where `URBAN` is not 98 
    URBAN != 98 ~ URBAN == 1,
    # Use `TRUE` if `URBAN` is `NA` (all of these are approved DRC cases)
    is.na(URBAN) ~ TRUE
  )) %>% 
  select(-FEM_URBAN)

# There are still 74 missing: cannot classify further at this time
dat %>% count(URBAN)

dat <- dat %>% labelled::set_variable_labels(URBAN = "Urban facility")
```



# Labels for other unpacked variables 

```{r}
dat <- dat %>% 
  set_variable_labels(
    COUNTRY = "SDP country",
    SURVEY_YEAR = "SDP nominal sample year",
    EAID = "SDP enumeration area",
    SAMPLE = "SDP sample code",
    ROUND = "SDP sample round",
    CONSENTSQ = "SDP respondent consented to interview",
    FPOFFERED = "SDP offers any FP service or product"
  )
```



# General Services

Every variable packed into `SERVICES_PROV` is a binary indicator for some type of service. If the facility is NIU because facilities of its type normally do not provide the service, we mark it `FALSE`. 

Note: in some later samples, `PNCPROV` is missing from IPUMS data, but it's clear that the facility provides post-natal care in the universe for `FPPNC`. 

```{r}
# Fix PNCPROV
dat$SERVICES_PROV <- dat$SERVICES_PROV %>% 
  mutate(PNCPROV = if_else(
    is.na(PNCPROV),
    FPPNC < 99,
    PNCPROV == 1
  )) 

dat <- dat %>% 
  mutate(
    SERVICES_PROV = SERVICES_PROV %>% 
      mutate(across(everything(), ~.x == 1)) %>% 
      varlbl_suffix(
        ANCPROV = "Antenatal care",
        POSTABSERPROV = "Post-abortion care",
        DELIVSERPROV = "Delivery",
        PNCPROV = "Postnatal care",
        FPCHV = "FP offered via community health volunteers",
        FPPOSTAB = "FP offered during postabortion visits",
        FPPNC = "FP offered during postnatal visits",
        OFFERHIVSERV = "HIV services",
      )
  ) %>%
  varlbl_prefix(SERVICES_PROV = "Other Health Services")
```


# FP Method Services 

If `FPOFFERED == "Yes"`, SDPs are asked about individual FP methods that are: 

  * `PROV` - provided directly to clients 


Our analysis focuses on eight family planning methods that were included in the SDP questionnaire administered in every sample. We organize these methods into the following groups: 

  * `LONG` - long-acting methods (IUDs, implants, injectables)
  * `SHORT` - short-acting methods (male condoms, female condoms, pills, emergency methods, cycle beads)
  * `CORE8` - all of the above 
  * `EMRG` - emergency methods only 

```{r}


# set aside "any" method provision 
any <- dat$FPPROV %>% 
  rename_with(~str_remove(.x, "PROV"), everything()) %>% 
  mutate(across(everything(), ~.x == 1)) %>% 
  transmute(
    ANY = case_when(
      if_any(everything()) ~ TRUE,
      if_any(everything(), ~.x == FALSE) ~ FALSE
    ),
    ANYNUM = cur_data() %>% select(!ANY) %>% rowSums(na.rm = TRUE)
  ) %>% 
  core_lbl(
    ANY = "Any methods",
    ANYNUM = "Number of any methods"
  ) 

# provision
dat <- dat %>% 
  mutate(across(c(FPPROV), ~{
    fp_lbl = case_when(
      cur_column() == "FPPROV" ~ "providers"
    )
    fp_lbler = function(...){
      paste(..., "(core method", fp_lbl, "only)") 
    }
    .x %>% 
      core8() %>% 
      transmute(
        across(everything(), ~.x == 1),
        SHORT = case_when(
          if_any(shorts()) ~ TRUE,
          if_any(shorts(), ~.x == FALSE) ~ FALSE
        ),
        SHORTNUM = cur_data() %>% 
          select(shorts()) %>% 
          rowSums(na.rm = F),
        LONG = case_when(
          if_any(longs()) ~ TRUE,
          if_any(longs(), ~.x == FALSE) ~ FALSE
        ),
        LONGNUM = cur_data() %>% 
          select(longs()) %>% 
          rowSums(na.rm = F),
        EMRG = EMRG,
        CORE8 = case_when(
          if_any(c(SHORT, LONG)) ~ TRUE,
          if_any(c(SHORT, LONG), ~.x == FALSE) ~ FALSE
        ),
        CORE8NUM = cur_data() %>%
          select(SHORTNUM, LONGNUM) %>%
          rowSums(na.rm = F),
        SHORTC = case_when(CORE8 ~ SHORT),
        SHORTNUMC = case_when(CORE8 ~ SHORTNUM),
        LONGC = case_when(CORE8 ~ LONG),
        LONGNUMC = case_when(CORE8 ~ LONGNUM),
        EMRGC = case_when(CORE8 ~ EMRG),
        CORE8NUMC = case_when(CORE8 ~ CORE8NUM)
      ) %>% 
      core_lbl(
        SHORTC = fp_lbler("Any short-acting methods"), 
        SHORTNUMC = fp_lbler("Number of short-acting methods"),
        LONGC = fp_lbler("Any LARCs"),
        LONGNUMC = fp_lbler("Number of LARCs"),
        EMRGC = fp_lbler("Emergency methods"),
        CORE8NUMC = fp_lbler("Number of core methods")
      )
  }))



# Pack labels 
dat <- dat %>% 
  varlbl_prefix(
    FPPROV = "Provides"
  ) 
```



# Provided Methods Only

Facilities that provide a method are asked four follow-up questions: 

  * Are clients charged for this method? 
  * Is this method currently out of stock? 
  * Has the method been out of stock recently (including now)? 
  * How many new visits, total visits, any units sold in the last month? 
  
For each of these questions, we use `NA` for facilities that don't provide the method in the first place. 

## Charges for provided methods

```{r}
# charges for a provided method
dat <- dat %>%
  mutate(
    FPCHARGE = FPCHARGE %>% 
      core8() %>% 
      transmute(
        CON = case_when(FPPROV$CON == 1 ~ CON == 1),
        CYCB = case_when(FPPROV$CYCB == 1 ~ CYCB == 1),
        EMRG = case_when(FPPROV$EMRG == 1 ~ EMRG == 1),
        FC = case_when(FPPROV$FC == 1 ~ FC == 1),
        IMP = case_when(FPPROV$IMP == 1 ~ IMP == 1),
        IUD = case_when(FPPROV$IUD == 1 ~ IUD == 1),
        PILL = case_when(FPPROV$PILL == 1 ~ PILL == 1),
        INJANY,
        SHORT = case_when(
          if_any(shorts()) ~ TRUE,
          if_any(shorts(), ~.x == FALSE) ~ FALSE
        ),
        LONG = case_when(
          if_any(longs()) ~ TRUE,
          if_any(longs(), ~.x == FALSE) ~ FALSE
        ),
        EMRG = EMRG,
        CORE8 = case_when(
          if_any(c(SHORT, LONG)) ~ TRUE,
          if_any(c(SHORT, LONG), ~.x == FALSE) ~ FALSE
        )
      ) %>% 
      core_lbl(note = "if provided")
  )

dat <- dat %>% 
  varlbl_prefix(
    FPCHARGE = "Charges for"
  ) 
```

## Current stockout of provided methods

```{r}
dat <- dat %>% 
  mutate(
    FPCUROUT = FPCUROUT %>% 
      core8() %>% 
      transmute(
        CON = case_when(FPPROV$CON == 1 ~ CON > 1),
        CYCB = case_when(FPPROV$CYCB == 1 ~ CYCB > 1),
        EMRG = case_when(FPPROV$EMRG == 1 ~ EMRG > 1), 
        FC = case_when(FPPROV$FC == 1 ~ FC > 1),
        IMP = case_when(FPPROV$IMP == 1 ~ IMP > 1),
        IUD = case_when(FPPROV$IUD == 1 ~ IUD > 1),
        PILL = case_when(FPPROV$PILL == 1 ~ PILL > 1),
        INJANY,
        SHORT = case_when(
          if_any(shorts()) ~ TRUE,
          if_any(shorts(), ~.x == FALSE) ~ FALSE
        ),
        LONG = case_when(
          if_any(longs()) ~ TRUE,
          if_any(longs(), ~.x == FALSE) ~ FALSE
        ),
        EMRG = EMRG,
        CORE8 = case_when(
          if_any(c(SHORT, LONG)) ~ TRUE,
          if_any(c(SHORT, LONG), ~.x == FALSE) ~ FALSE
        )
      ) %>% 
      core_lbl(note = "if provided")
  ) 

dat <- dat %>% 
  varlbl_prefix(
    FPCUROUT = "Current stockout"
  ) 
```

## Recent / current stockout of provided methods


```{r}
# iterate over each definition of "recent" (3, 6, or 12 months)
recout <- dat %>% 
  select(FPPROV, FPCUROUT, FPOUT3MO, FPOUT6MO, FPOUTYR) %>% 
  mutate(across(
    c(FPOUT3MO, FPOUT6MO, FPOUTYR), 
    ~.x %>% 
      core8() %>% 
      mutate(
        CON = case_when(FPPROV$CON == 1 ~ FPCUROUT$CON | CON > 0),
        CYCB = case_when(FPPROV$CYCB == 1 ~ FPCUROUT$CYCB | CYCB > 0),
        EMRG = case_when(FPPROV$EMRG == 1 ~ FPCUROUT$EMRG | EMRG > 0), 
        FC = case_when(FPPROV$FC == 1 ~ FPCUROUT$FC | FC > 0),
        IMP = case_when(FPPROV$IMP == 1 ~ FPCUROUT$IMP | IMP > 0),
        IUD = case_when(FPPROV$IUD == 1 ~ FPCUROUT$IUD | IUD > 0),
        PILL = case_when(FPPROV$PILL == 1 ~ FPCUROUT$PILL | PILL > 0)
      )
  )) 

# Combine definitions of "recent"
recout <- recout %>% 
  transmute(
    CON = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$CON) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$CON == FALSE) ~ FALSE,
    ),
    CYCB = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$CYCB) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$CYCB == FALSE) ~ FALSE
    ),
    EMRG = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$EMRG) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$EMRG == FALSE) ~ FALSE,
    ),
    FC = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$FC) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$FC == FALSE) ~ FALSE,
    ),
    IMP = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$IMP) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$IMP == FALSE) ~ FALSE,
    ),
    IUD = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$IUD) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$IUD == FALSE) ~ FALSE,
    ),
    PILL = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$PILL) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$PILL == FALSE) ~ FALSE,
    ),
    INJANY = case_when(
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$INJANY) ~ TRUE,
      if_any(c(FPOUT3MO, FPOUT6MO, FPOUTYR), ~.x$INJANY == FALSE) ~ FALSE,
    )
  )

# Add summary variables 
dat$FPRECOUT <- recout %>% 
  mutate(
    SHORT = case_when(
      if_any(shorts()) ~ TRUE,
      if_any(shorts(), ~.x == FALSE) ~ FALSE
    ),
    LONG = case_when(
      if_any(longs()) ~ TRUE,
      if_any(longs(), ~.x == FALSE) ~ FALSE
    ),
    EMRG = EMRG,
    CORE8 = case_when(
      if_any(c(SHORT, LONG)) ~ TRUE,
      if_any(c(SHORT, LONG), ~.x == FALSE) ~ FALSE
    )
  ) %>% 
  core_lbl("if provided")

dat <- dat %>% 
  select(-c(FPOUT3MO, FPOUT6MO, FPOUTYR)) %>% 
  varlbl_prefix(
    FPRECOUT = "Recent or current stockout"
  ) 
```

# Current Stockout Methods Only 

Facilities that provide a method *and were currently out of stock* are asked one additional follow-up question: 

  * How many days has the method been out of stock?

```{r}
dat$FPOUTDAY <- dat$FPOUTDAY %>% 
  core8() %>% 
  mutate(
    across(everything(), ~case_when(.x < 9990 ~ .x %>% zap_labels)),
    SHORT = case_when(
      !if_all(shorts(), is.na) ~
        cur_data() %>% select(shorts()) %>% rowMeans(na.rm = T)
    ),
    LONG = case_when(
      !if_all(longs(), is.na) ~
        cur_data() %>% select(longs()) %>% rowMeans(na.rm = T)
    ),
    EMRG = EMRG,
    CORE8 = case_when(
      !if_all(c(shorts(), longs()), is.na) ~
        cur_data() %>% select(shorts(), longs()) %>% rowMeans(na.rm = T)
    )
  ) %>% 
  core_lbl("if provided & current stockout")

dat <- dat %>% 
  varlbl_prefix(
    FPOUTDAY = "Days out"
  )
```


# Collapse Facility Types 

We create `FACILITYTYPE_3` to describe `FACILITYTYPEGEN` in three categories:

  * "Direct Care" designates a hospital, health center, health clinic, private practice, or "other health facility"
  * "Retailer" designates a dispensary, pharmacy, chemist, drug shop, boutique, or "shop"
  * "Other" designates "other" 

Also, create a two-category version of authority
  
```{r}
dat <- dat %>% 
  mutate(
    FACILITYTYPE_3 = case_when(
      FACILITYTYPEGEN < 6 ~ "Direct Care",
      FACILITYTYPEGEN < 9 ~ "Retailer",
      FACILITYTYPEGEN == 9 ~ "Other") %>% 
      fct_relevel("Direct Care", "Retailer", "Other"),
    DIRECT_CARE = FACILITYTYPE_3 == "Direct Care",
    PUBLIC = AUTHORITY == 1,
    PRIVATE = AUTHORITY == 4,
    NGO = AUTHORITY == 2
  )

dat <- dat %>% 
  set_variable_labels(
    FACILITYTYPE_3 = "Facility type",
    DIRECT_CARE = "Facilty provides direct care",
    PUBLIC = "Public-sector facility",
    PRIVATE = "Private-sector facility",
    NGO = "NGO facility",
  )
```


# CHVs (Community Health Volunteers)

```{r}
dat$CHV <- dat$CHV %>% 
  mutate(across(everything(), ~lbl_na_if(.x, ~.val > 90) == 1)) %>% 
  transmute(
    SHORT = case_when(
      if_any(c(CHVCON, CHVPILL)) ~ TRUE,
      if_any(c(CHVCON, CHVPILL), ~.x == FALSE) ~ FALSE
    ),
    LONG = CHVINJ,
    CORE8 = case_when(
      if_any(c(SHORT, LONG)) ~ TRUE,
      if_any(c(SHORT, LONG), ~.x == FALSE) ~ FALSE
    )
  ) %>% 
  varlbl_suffix(
    CHVCON = "male condoms",
    CHVINJ = "injectables",
    CHVPILL = "pill",
  ) %>% 
  core_lbl()

dat <- dat %>% varlbl_prefix(CHV = "CHVs provide") 
```



# Mobile Outreach 

```{r}
dat$MOBILE <- dat$MOBILE %>% 
  mutate(
    across(everything(), ~lbl_na_if(.x, ~.val > 90) == 1),
    RECOUTREACH = case_when(
      if_any(starts_with("OUTREACH")) ~ TRUE,
      if_any(starts_with("OUTREACH"), ~.x == FALSE) ~ FALSE
    )
  ) %>% 
  set_variable_labels(
    OUTREACH6MO = "Mobile outreach team visited within 6 months",
    OUTREACHYR = "Mobile outreach team visited within 12 months",
    RECOUTREACH = "Mobile outreach team visited recently"
  )
```



# Has 

Keep only water and electricity; coerce as logicals. Because these are often controls in our DID models, I'll unpack them right away. 

```{r}
dat$HAS <- dat$HAS %>% 
  mutate(across(everything(), ~lbl_na_if(.x, ~.val > 2) > 0)) %>%
  select(HASELECTRC, HASWATER) %>% 
  rename_with(~str_remove(.x, "HAS")) %>% 
  set_variable_labels(
    ELECTRC = "Has today: electricity",
    WATER = "Has today: water"
  )

dat <- dat %>% unpack(HAS, names_sep = "_")
```



# Write clean SDP data with lower-case names

```{r}
# write final sdp data set
dat %>%
  set_variable_labels(
     YEAR = "SDP interview year"
  ) %>%
  rename_with(tolower) %>%
  mutate(across(
    where(is_tibble),
    ~.x %>%
      rename_with(tolower) %>%
      ungroup()
  )) %>%
  write_rds(here("data-clean/plgha_sdp_df.rds"))
  
```

# Example data dictionary 

```{r}
clean <- here("data-clean/plgha_sdp_df.rds") %>% read_rds()

clean %>%
  unpack(where(is_tibble), names_sep = "_") %>%
  ipums_var_info() %>%
  select(var_name, var_label) %>%
  print(n = Inf)

```

